library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(spatialOnco)
library(spatgraphs)
library(glmnetUtils)
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.16.3). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:stats':
## 
##     ar
library(bayesplot)
## This is bayesplot version 1.8.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
library(DHARMa)
## This is DHARMa 0.4.4. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
DATA_PATH = '../data/nbhd_coord_schurch_2020/'
RESULTS_PATH = '../results/bayes_glmnet_nbhds_01082022/'
dir.create(RESULTS_PATH)
## Warning in dir.create(RESULTS_PATH): '../results/bayes_glmnet_nbhds_01082022'
## already exists
df = read_csv(paste0(DATA_PATH,'CRC_master.csv'))
## New names:
## * `` -> ...1
## Rows: 258385 Columns: 83
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): spots, ClusterName
## dbl (81): ...1, CellID, patients, groups, size, CD44 - stroma, FOXP3 - regul...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
spot.labels = c("15_A","15_B","16_A","16_B")
keep_types = unique(df$ClusterName)

nbhds = make_spot_nbhds(df,spot.labels,keep_types, radius=50)
## Registered S3 method overwritten by 'spatstat.geom':
##   method     from
##   print.boxx cli
cv.fit.4.1 = run_model(cv.glmnet,
                      file = paste0(RESULTS_PATH,"cv.fit.4.1"),
                      formula = tumor.cells ~ 1 + (CD4..T.cells.CD45RO. + CD68.CD163..macrophages + CD8..T.cells + Tregs + vasculature + CD11b.CD68..macrophages + CD68..macrophages + stroma + plasma.cells+spot)^2,
                      data = nbhds,
                      family = poisson())

plot(cv.fit.4.1)

# frequency of tumor cell counts
table(nbhds$tumor.cells)
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 5083 1353  586  305  213  185  146  135   86   78   66   55   46   43   30   30 
##   16   17   18   19   20   21   22 
##   26   21   14   17    7    5    4
hist(nbhds$tumor.cells, breaks = 0:22)

# get an idea of the dispersion parameter in each spot
nbhds %>% 
  group_by(spot) %>%
  summarise(d_tumor = var(tumor.cells)/mean(tumor.cells))
# This chunk mainly useful for GWR models (which I was unable to get working very well, the bandwidth estimation kept failing, likely because the design matrix was singular in some locations. Could try again)
cf = coef(cv.fit.4.2, s = "lambda.1se")
terms = rownames(cf)[cf[,1] != 0]
terms.keep = unique(str_replace_all(terms,"spot.*","spot"))
terms.keep = unique(terms[-grep("spot",terms)])

terms.keep = terms.keep[-1]
fm = paste0("tumor.cells ~ ",paste(terms.keep, collapse = " + "))

feats = nbhds %>%
  named_group_split(spot, keep = FALSE)

coords = df %>% 
  rename(spot = spots) %>%
  filter(spot %in% spot.labels) %>%
  select(spot,X,Y) %>%
  named_group_split(spot, keep = FALSE)

library(GWmodel)
spdfs = mapply(SpatialPointsDataFrame,coords,feats,SIMPLIFY = F)

spplot(spdfs[[1]],"tumor.cells")

More Bayesian models

fit.4.2 <- brm(formula = bf(tumor.cells ~ 1 + (CD4..T.cells.CD45RO. + CD68.CD163..macrophages + CD8..T.cells + Tregs + vasculature + CD11b.CD68..macrophages +spot)),
  family = negbinomial(),
  data = nbhds,
  iter = 2000,
  warmup = 1000,
  cores = 4,
  file = paste0(RESULTS_PATH,"fit.4.2")
) %>% add_criterion("loo")

summary(fit.4.2)
##  Family: negbinomial 
##   Links: mu = log; shape = identity 
## Formula: tumor.cells ~ 1 + (CD4..T.cells.CD45RO. + CD68.CD163..macrophages + CD8..T.cells + Tregs + vasculature + CD11b.CD68..macrophages + spot) 
##    Data: nbhds (Number of observations: 8534) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   0.94      0.04     0.85     1.02 1.00     4478
## CD4..T.cells.CD45RO.       -0.72      0.06    -0.83    -0.61 1.00     6746
## CD68.CD163..macrophages    -0.46      0.02    -0.51    -0.42 1.00     5161
## CD8..T.cells               -0.80      0.07    -0.93    -0.66 1.00     6971
## Tregs                      -0.46      0.05    -0.56    -0.36 1.00     7224
## vasculature                -0.41      0.03    -0.46    -0.35 1.00     6901
## CD11b.CD68..macrophages    -0.32      0.17    -0.66     0.01 1.00     8318
## spot15_B                   -1.33      0.07    -1.47    -1.20 1.00     4967
## spot16_A                   -0.22      0.06    -0.33    -0.11 1.00     5227
## spot16_B                    0.51      0.05     0.41     0.60 1.00     4412
##                         Tail_ESS
## Intercept                   3270
## CD4..T.cells.CD45RO.        2458
## CD68.CD163..macrophages     2573
## CD8..T.cells                2990
## Tregs                       2713
## vasculature                 2817
## CD11b.CD68..macrophages     2845
## spot15_B                    3150
## spot16_A                    3344
## spot16_B                    3453
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     0.49      0.01     0.46     0.52 1.00     6534     3153
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
fit.4.3 <- brm(formula = bf(tumor.cells ~ 1 + (CD4..T.cells.CD45RO. + CD68.CD163..macrophages + CD8..T.cells + Tregs + vasculature + CD11b.CD68..macrophages +spot)),
  family = zero_inflated_negbinomial(),
  data = nbhds,
  iter = 2000,
  warmup = 1000,
  cores = 4,
  file = paste0(RESULTS_PATH,"fit.4.3")
) %>% add_criterion("loo")

check_brms(fit.4.2)
## DHARMa:testOutliers with type = binomial may have inflated Type I error rates for integer-valued distributions. To get a more exact result, it is recommended to re-run testOutliers with type = 'bootstrap'. See ?testOutliers for details

check_brms(fit.4.3)

summary(fit.4.3)
##  Family: zero_inflated_negbinomial 
##   Links: mu = log; shape = identity; zi = identity 
## Formula: tumor.cells ~ 1 + (CD4..T.cells.CD45RO. + CD68.CD163..macrophages + CD8..T.cells + Tregs + vasculature + CD11b.CD68..macrophages + spot) 
##    Data: nbhds (Number of observations: 8534) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   1.34      0.04     1.25     1.42 1.00     2783
## CD4..T.cells.CD45RO.       -0.75      0.05    -0.85    -0.64 1.00     4487
## CD68.CD163..macrophages    -0.45      0.02    -0.49    -0.41 1.00     4481
## CD8..T.cells               -0.79      0.06    -0.92    -0.67 1.00     4523
## Tregs                      -0.44      0.05    -0.54    -0.35 1.00     5462
## vasculature                -0.41      0.03    -0.46    -0.36 1.00     5734
## CD11b.CD68..macrophages    -0.26      0.17    -0.59     0.06 1.00     4845
## spot15_B                   -1.37      0.06    -1.49    -1.25 1.00     3523
## spot16_A                   -0.26      0.05    -0.36    -0.15 1.00     3284
## spot16_B                    0.49      0.04     0.41     0.58 1.00     3250
##                         Tail_ESS
## Intercept                   2672
## CD4..T.cells.CD45RO.        3019
## CD68.CD163..macrophages     3158
## CD8..T.cells                2946
## Tregs                       3137
## vasculature                 3261
## CD11b.CD68..macrophages     3269
## spot15_B                    2675
## spot16_A                    2733
## spot16_B                    3192
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     1.28      0.09     1.12     1.45 1.00     2723     2756
## zi        0.32      0.01     0.29     0.35 1.00     2373     2417
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

Posterior predictive checks

y = nbhds$tumor.cells
yrep.4.2 <- posterior_predict(fit.4.2, ndraws = 500)
yrep.4.3 <- posterior_predict(fit.4.3, ndraws = 500)

ppc_dens_overlay(y,yrep.4.2)

ppc_dens_overlay(y,yrep.4.3)

prop_zero <- function(x) mean(x == 0)
prop_zero(y)
## [1] 0.5956175
ppc_stat(y, yrep.4.2, stat = "prop_zero", binwidth = 0.005)

ppc_stat(y, yrep.4.3, stat = "prop_zero", binwidth = 0.005)

dispersion <- function(x) var(x)/mean(x)

ppc_stat(y, yrep.4.2, stat = "dispersion", binwidth = 0.005)

ppc_stat(y, yrep.4.3, stat = "dispersion", binwidth = 0.005)

ppc_stat_grouped(y, yrep.4.3, group = nbhds$spot, stat = "prop_zero")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat_grouped(y, yrep.4.3, group = nbhds$spot, stat = "dispersion")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

d = nbhds[nbhds$spot == "15_A",]
fit.4.3b <- brm(formula = bf(tumor.cells ~ 1 + (CD4..T.cells.CD45RO. + CD68.CD163..macrophages + CD8..T.cells + Tregs + vasculature + CD11b.CD68..macrophages)),
  family = zero_inflated_negbinomial(),
  data = d,
  iter = 2000,
  warmup = 1000,
  cores = 4,
  file = paste0(RESULTS_PATH,"fit.4.3b")
) %>% add_criterion("loo")
check.4.3b = check_brms(fit.4.3b, plot = F)
testSpatialAutocorrelation(simulationOutput = check.4.3b, x = d$X, y= d$Y)

## 
##  DHARMa Moran's I test for distance-based autocorrelation
## 
## data:  check.4.3b
## observed = 0.06536117, expected = -0.00040161, sd = 0.00078674, p-value
## < 2.2e-16
## alternative hypothesis: Distance-based autocorrelation
fit.4.4 <- brm(formula = bf(tumor.cells ~ 1 + (CD4..T.cells.CD45RO. + CD68.CD163..macrophages + CD8..T.cells + Tregs + vasculature + CD11b.CD68..macrophages +spot), zi ~ spot),
  family = zero_inflated_negbinomial(),
  data = nbhds,
  # control = list(adapt_delta = 0.99),
  iter = 2000,
  warmup = 1000,
  cores = 4,
  file = paste0(RESULTS_PATH,"fit.4.4")
) %>% add_criterion("loo")
## Registered S3 method overwritten by 'spdep':
##   method   from
##   plot.mst ape
summary(fit.4.4)
##  Family: zero_inflated_negbinomial 
##   Links: mu = log; shape = identity; zi = logit 
## Formula: tumor.cells ~ 1 + (CD4..T.cells.CD45RO. + CD68.CD163..macrophages + CD8..T.cells + Tregs + vasculature + CD11b.CD68..macrophages + spot) 
##          zi ~ spot
##    Data: nbhds (Number of observations: 8534) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Population-Level Effects: 
##                         Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                   1.36      0.05     1.26     1.45 1.00     2644
## zi_Intercept               -0.62      0.10    -0.82    -0.43 1.00     2357
## CD4..T.cells.CD45RO.       -0.74      0.05    -0.84    -0.63 1.00     5015
## CD68.CD163..macrophages    -0.45      0.02    -0.49    -0.41 1.00     4592
## CD8..T.cells               -0.79      0.06    -0.91    -0.66 1.00     4130
## Tregs                      -0.45      0.05    -0.54    -0.35 1.00     4792
## vasculature                -0.41      0.03    -0.46    -0.35 1.00     4443
## CD11b.CD68..macrophages    -0.30      0.16    -0.62     0.01 1.00     4793
## spot15_B                   -1.62      0.10    -1.81    -1.43 1.00     1055
## spot16_A                   -0.35      0.06    -0.46    -0.23 1.00     3003
## spot16_B                    0.46      0.05     0.37     0.56 1.00     3040
## zi_spot15_B                -1.87      2.63   -10.67    -0.44 1.01      438
## zi_spot16_A                -0.42      0.15    -0.72    -0.14 1.00     2777
## zi_spot16_B                -0.15      0.10    -0.34     0.05 1.00     2927
##                         Tail_ESS
## Intercept                   2730
## zi_Intercept                2636
## CD4..T.cells.CD45RO.        3008
## CD68.CD163..macrophages     2818
## CD8..T.cells                2833
## Tregs                       2898
## vasculature                 2744
## CD11b.CD68..macrophages     2878
## spot15_B                     998
## spot16_A                    3294
## spot16_B                    2808
## zi_spot15_B                  173
## zi_spot16_A                 2803
## zi_spot16_B                 2604
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## shape     1.21      0.09     1.04     1.40 1.00     1735     2026
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
yrep.4.4 <- posterior_predict(fit.4.4, ndraws = 500)

ppc_stat(y, yrep.4.4, stat = "prop_zero", binwidth = 0.005)

ppc_stat(y, yrep.4.4, stat = "dispersion", binwidth = 0.005)

ppc_stat_grouped(y, yrep.4.4, group = nbhds$spot, stat = "prop_zero")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ppc_stat_grouped(y, yrep.4.4, group = nbhds$spot, stat = "dispersion")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Spatial autocorrelation

d = nbhds[nbhds$spot == "15_A",]
fit.4.5 <- brm(formula = bf(tumor.cells ~ 1 + (CD4..T.cells.CD45RO. + CD68.CD163..macrophages + CD8..T.cells + Tregs + CD11b.CD68..macrophages), zi ~ 1),
  family = zero_inflated_negbinomial(),
  data = d,
  # control = list(adapt_delta = 0.99),
  iter = 2000,
  warmup = 1000,
  cores = 4,
  file = paste0(RESULTS_PATH,"fit.4.5")
) %>% add_criterion("loo")
check.4.5 = check_brms(fit.4.5, plot = F)
testSpatialAutocorrelation(simulationOutput = check.4.5, x = d$X, y= d$Y)

## 
##  DHARMa Moran's I test for distance-based autocorrelation
## 
## data:  check.4.5
## observed = 0.06832943, expected = -0.00040161, sd = 0.00078674, p-value
## < 2.2e-16
## alternative hypothesis: Distance-based autocorrelation
tibble(X = d$X, Y = d$Y, res = check.4.5$scaledResiduals) %>%
  ggplot(aes(x = X, y = Y, colour = res)) +
  geom_point(alpha=0.7) + 
  scale_colour_gradient(low = "red", high = "blue")